library(tidyverse)
library(DESeq2)
library(biomaRt)
## Warning: replacing previous import 'utils::findMatches' by
## 'S4Vectors::findMatches' when loading 'AnnotationDbi'
library(clusterProfiler)
library(org.Hs.eg.db)
library(factoextra)
library(vegan)
library(ggrepel)
library(ggsci)
library(clusterProfiler)
library(enrichplot)
library(DT)
setwd("/storage2/Elena/")
table2 <- read_tsv("/storage2/Elena/rnaseqRenombrado/nextflow/star_salmon/salmon.merged.transcript_counts.tsv")
meta <- colnames(table2) %>%
as_tibble() %>%
filter(value != "tx") %>%
filter(value != "gene_id") %>%
rename(sample = value) %>%
separate(sample,c("c1","c2"), remove = F, sep = "\\.") %>%
unite("grupo",c1:c2, sep = "_") %>%
mutate(grupo = str_replace(grupo,"_\\d+$","")) %>%
as_data_frame() %>%
arrange(sample) %>%
column_to_rownames("sample")
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` (with slightly different semantics) to convert to a
## tibble, or `as.data.frame()` to convert to a data frame.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
We import biological annotation from ENSEMBL
mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL",
dataset = "hsapiens_gene_ensembl",
host = 'https://www.ensembl.org')
ttg <- biomaRt::getBM(
attributes = c("ensembl_transcript_id","ensembl_gene_id","external_gene_name", "description",
"entrezgene_id","gene_biotype","transcript_biotype"),
mart = mart)
table_sum <- table2 %>%
pivot_longer(names_to = "Sample", values_to = "counts", -(tx:gene_id)) %>%
rename(ensembl_transcript_id = tx) %>%
inner_join(ttg) %>%
filter(transcript_biotype == "protein_coding") %>%
group_by(ensembl_gene_id,Sample,"external_gene_name","description",
"entrezgene_id","gene_biotype") %>%
summarise(counts = sum(counts)) %>%
ungroup() %>%
group_by(ensembl_gene_id) %>%
mutate(TotalZeros = sum(counts ==0)) %>%
mutate(TotalCounts = sum(counts)) %>%
ungroup() %>%
filter(TotalZeros < 8) %>%
filter(TotalCounts > 50) %>%
dplyr::select(ensembl_gene_id,Sample,counts) %>%
distinct() %>%
pivot_wider(names_from = Sample, values_from = counts)%>%
column_to_rownames("ensembl_gene_id") %>%
round()
## Joining with `by = join_by(ensembl_transcript_id)`
## Warning in inner_join(., ttg): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 283701 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## `summarise()` has grouped output by 'ensembl_gene_id', 'Sample',
## '"external_gene_name"', '"description"', '"entrezgene_id"'. You can override
## using the `.groups` argument.
dds_gene <- DESeqDataSetFromMatrix(countData = table_sum %>% as.matrix(),
colData = meta,
design = ~grupo
)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
dds_gene <- DESeq(dds_gene,sfType = "poscounts", fitType = "mean")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
table2 %>% pivot_longer(names_to = "Sample", values_to = "counts", -(tx:gene_id)) %>%
group_by(Sample) %>%
summarise(TotalCounts = sum(counts)) %>%
full_join(meta %>% as_tibble(rownames = "Sample")) %>%
ggplot(aes(x= Sample, y = TotalCounts, fill = grupo)) + geom_col() + coord_flip() + theme_light()+ labs(title = "Raw Data")
## Joining with `by = join_by(Sample)`
counts(dds_gene, normalized = T) %>% as_tibble(rownames = "ensembl_gene_id") %>% pivot_longer(names_to = "Sample", values_to = "counts", -ensembl_gene_id) %>%
group_by(Sample) %>%
summarise(TotalCounts = sum(counts)) %>%
full_join(meta %>% as_tibble(rownames = "Sample")) %>%
ggplot(aes(x= Sample, y = TotalCounts, fill = grupo)) + geom_col() + coord_flip() + theme_light() + labs(title = "Normalize Data")
## Joining with `by = join_by(Sample)`
counts(dds_gene, normalized = T) %>%
as_tibble(rownames = "ensembl_gene_id") %>%
pivot_longer(names_to = "Sample", values_to = "counts", -ensembl_gene_id) %>%
full_join(meta %>% as_tibble(rownames = "Sample")) %>%
inner_join(ttg) %>%
group_by(Sample,gene_biotype) %>%
summarise(counts = sum(counts)) %>%
ggplot(aes(x = Sample, y = counts, fill = gene_biotype)) +
geom_col(position = "dodge") +
scale_y_log10() + coord_flip()
## Joining with `by = join_by(Sample)`
## Joining with `by = join_by(ensembl_gene_id)`
## Warning in inner_join(., ttg): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 73971 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## `summarise()` has grouped output by 'Sample'. You can override using the
## `.groups` argument.
To check the quality of the sample we test the homogenicity of the groups. In this case we represent the samples using Partial Least Squares Discriminant Analysis (PLS-DA)
library(mixOmics)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:clusterProfiler':
##
## select
## The following object is masked from 'package:biomaRt':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
## Loaded mixOmics 6.24.0
## Thank you for using mixOmics!
## Tutorials: http://mixomics.org
## Bookdown vignette: https://mixomicsteam.github.io/Bookdown
## Questions, issues: Follow the prompts at http://mixomics.org/contact-us
## Cite us: citation('mixOmics')
##
## Attaching package: 'mixOmics'
## The following object is masked from 'package:purrr':
##
## map
X = counts(dds_gene, normalized = T) %>% t()
Y = counts(dds_gene, normalized = T) %>%
t() %>%
as_tibble(rownames = "Sample") %>%
inner_join(meta %>%
rownames_to_column("Sample")) %>%
mutate(grupo = as.factor(grupo)) %>%
pull(grupo)
## Joining with `by = join_by(Sample)`
pl <- splsda(X,Y, ncomp = 10, scale = T)
plotIndiv(pl, ind.names = Y, ellipse = TRUE, legend =TRUE)
To test the homogenicity we perform a PERMANOVA test
dist <- counts(dds_gene, normalized = T) %>%
t() %>% vegdist(method = "binomial")
adonis2(dist ~ grupo, data = meta, permutations = 10000)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = dist ~ grupo, data = meta, permutations = 10000)
## Df SumOfSqs R2 F Pr(>F)
## grupo 3 729396 0.61428 4.2467 0.0013 **
## Residual 8 458010 0.38572
## Total 11 1187406 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The homogenicity of the groups is statistically significant so the sample quality is good enough
All the test were performed at gene level and including lncRNA. You can find all the table results at https://github.com/irycisBioinfo/rnaSeq_elena
For each comparison we have performed an Gene Set Enrichment Analysis of GO term and KEGG pathways.
C_4H_vs_Q2_4H <- results(dds_gene, contrast = c("grupo","C_4H","Q2_4H")) %>%
as_tibble(rownames = "ensembl_gene_id") %>%
inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
C_4H_vs_Q2_4H %>%
write_tsv("C_4H_vs_Q2_4H.tsv")
C_4H_vs_Q2_4H %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= log2FoldChange,
y = -log10(padj),
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
labs(title = "C_4H vs Q2_4H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13995 rows containing missing values (`geom_text_repel()`).
C_4H_vs_Q2_4H %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= baseMean,
y = log2FoldChange,
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
scale_x_log10() +
labs(title = "C_4H vs Q2_4H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Removed 13995 rows containing missing values (`geom_text_repel()`).
### Funtional Annotation
C_4H_vs_Q2_4H_enrich <- C_4H_vs_Q2_4H %>%
filter(padj < 0.001) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_4H_vs_Q2_4H_enrich_go <- enrichGO(gene =C_4H_vs_Q2_4H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_4H_vs_Q2_4H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_4H_vs_Q2_4H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
C_4H_vs_Q2_4H_enrich_kegg<- enrichKEGG(gene = C_4H_vs_Q2_4H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## --> No gene can be mapped....
## --> Expected input gene ID: 51703,5232,3795,57016,10327,5161
## --> return NULL...
if ((C_4H_vs_Q2_4H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_4H_vs_Q2_4H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_8H_vs_Q2_8H <- results(dds_gene, contrast = c("grupo","C_8H","Q2_8H")) %>%
as_tibble(rownames = "ensembl_gene_id") %>%
inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
C_8H_vs_Q2_8H %>%
write_tsv("C_8H_vs_Q2_8H.tsv")
C_8H_vs_Q2_8H %>%
mutate(sig = ifelse((padj < 0.001 & abs(log2FoldChange) >1.5) ,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= log2FoldChange,
y = -log10(padj),
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
labs(title = "C_8H vs Q2_8H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13516 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 428 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
C_8H_vs_Q2_8H %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= baseMean,
y = log2FoldChange,
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
scale_x_log10()+
labs(title = "C_8H_vs_Q2_8H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 9341 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 4592 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
### Funtional Annotation
C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%
filter(padj < 0.001 & abs(log2FoldChange) >1.5) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
#### padj <0.01, abs(foldchange>1.5)
C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%
filter(padj < 0.01 & abs(log2FoldChange) >1.5) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%
filter(padj < 0.01 & log2FoldChange >1.5) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%
filter(padj < 0.01 & log2FoldChange <1.5) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%
filter(padj < 0.01 & abs(log2FoldChange) >2) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%
filter(padj < 0.01 & log2FoldChange >2) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
#### padj <0.01, foldchange<2
C_8H_vs_Q2_8H_enrich <- C_8H_vs_Q2_8H %>%
filter(padj < 0.01 & log2FoldChange <2) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_8H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_8H_vs_Q2_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_8H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
C_8H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_8H_vs_Q2_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((C_8H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_8H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
C_4H_vs_C_8H <- results(dds_gene, contrast = c("grupo","C_4H","C_8H")) %>%
as_tibble(rownames = "ensembl_gene_id") %>%
inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
C_4H_vs_C_8H %>%
write_tsv("C_4H_vs_C_8H.tsv")
C_4H_vs_C_8H %>%
mutate(sig = ifelse((padj < 0.001 & abs(log2FoldChange) >1.5) ,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= log2FoldChange,
y = -log10(padj),
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
labs(title = "C_4H_vs_C_8H") +
facet_wrap(~gene_biotype, scales = "free")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13995 rows containing missing values (`geom_text_repel()`).
C_4H_vs_C_8H %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= baseMean,
y = log2FoldChange,
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
scale_x_log10()+
labs(title = "C_4H_vs_C_8H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13896 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 79 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
### Funtional Annotation
C_4H_vs_C_8H_enrich <- C_4H_vs_C_8H %>%
filter(padj < 0.001 & abs(log2FoldChange) >1.5) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_4H_vs_C_8H_enrich_go <- enrichGO(gene =C_4H_vs_C_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_4H_vs_C_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_4H_vs_C_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_4H_vs_C_8H_enrich_kegg<- enrichKEGG(gene = C_4H_vs_C_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((C_4H_vs_C_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_4H_vs_C_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
Q2_4H_vs_Q2_8H <- results(dds_gene, contrast = c("grupo","Q2_4H","Q2_8H")) %>%
as_tibble(rownames = "ensembl_gene_id") %>%
inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
Q2_4H_vs_Q2_8H %>%
write_tsv("Q2_4H_vs_Q2_8H.tsv")
Q2_4H_vs_Q2_8H %>%
mutate(sig = ifelse((padj < 0.001 & abs(log2FoldChange) >1.5) ,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= log2FoldChange,
y = -log10(padj),
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
labs(title = "Q2_4H_vs_Q2_8H") +
facet_wrap(~gene_biotype, scales = "free")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13881 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 68 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Q2_4H_vs_Q2_8H %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= baseMean,
y = log2FoldChange,
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
labs(title = "Q2_4H_vs_Q2_8H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 11121 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 2864 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
### Funtional Annotation
Q2_4H_vs_Q2_8H_enrich <- Q2_4H_vs_Q2_8H %>%
filter(padj < 0.001 & abs(log2FoldChange) >1.5) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
Q2_4H_vs_Q2_8H_enrich_go <- enrichGO(gene =Q2_4H_vs_Q2_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((Q2_4H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(Q2_4H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
Q2_4H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = Q2_4H_vs_Q2_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((Q2_4H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(Q2_4H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
C_4H_vs_Q2_8H <- results(dds_gene, contrast = c("grupo","C_4H","Q2_8H")) %>%
as_tibble(rownames = "ensembl_gene_id") %>%
inner_join(ttg %>% dplyr::select(-ensembl_transcript_id, -transcript_biotype) %>% distinct())
## Joining with `by = join_by(ensembl_gene_id)`
C_4H_vs_Q2_8H %>%
write_tsv("C_4H_vs_Q2_8H.tsv")
C_4H_vs_Q2_8H %>%
mutate(sig = ifelse((padj < 0.001 & abs(log2FoldChange) >1.5) ,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= log2FoldChange,
y = -log10(padj),
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
labs(title = "C_4H_vs_Q2_8H") +
facet_wrap(~gene_biotype, scales = "free")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 13742 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 195 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
C_4H_vs_Q2_8H %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",external_gene_name,NA)) %>%
ggplot(aes(x= baseMean,
y = log2FoldChange,
color = sig,
size = sqrt(baseMean),
label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 1)+
scale_color_d3() +
theme_light() +
scale_x_log10()+
labs(title = "C_4H_vs_Q2_8H")
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 10169 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 3761 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
### Funtional Annotation
C_4H_vs_Q2_8H_enrich <- C_4H_vs_Q2_8H %>%
filter(padj < 0.001 & abs(log2FoldChange) >1.5) %>%
dplyr::select(entrezgene_id) %>%
distinct() %>%
drop_na() %>%
pull(entrezgene_id)
C_4H_vs_Q2_8H_enrich_go <- enrichGO(gene =C_4H_vs_Q2_8H_enrich,
OrgDb = org.Hs.eg.db,
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
if ((C_4H_vs_Q2_8H_enrich_go %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_4H_vs_Q2_8H_enrich_go, showCategory = 20)
} else{
print("No enriched Pathways")
}
## [1] "No enriched Pathways"
C_4H_vs_Q2_8H_enrich_kegg<- enrichKEGG(gene = C_4H_vs_Q2_8H_enrich,
organism = 'hsa',
pvalueCutoff = 0.05)
if ((C_4H_vs_Q2_8H_enrich_kegg %>% as.data.frame() %>% nrow()) > 0 )
{
barplot(C_4H_vs_Q2_8H_enrich_kegg, showCategory = 20)
} else{
print("No enriched Pathways")
}
## [1] "No enriched Pathways"